# -*- coding: utf-8 -*-
'''
Created on 2017年4月9日

@author: ZhuJiahui
'''
import os
import time
from global_info.global_nlp import GlobalNLP
from file_utils.file_writer import quick_write_1d_to_text

if __name__ == '__main__':
    start = time.clock()    
    now_directory = os.getcwd()
    root_directory = os.path.dirname(now_directory) + '/'
    read_filename = root_directory + u'dataset/sogou/train_segment_pos.txt'
    write_filename = root_directory + u'dataset/sogou/train_all_word_list.txt'
    
    word_list = []
    with open(read_filename, 'r', encoding="gbk") as f:
        for each_line in f:
            word_with_tag = [x for x in each_line.strip().split()]
            for word in set(word_with_tag).difference(word_list):
                word_list.append(word)
        
    quick_write_1d_to_text(write_filename, word_list)
        
    print('Total time %f seconds' % (time.clock() - start))